function processFile(blob, fileName) { var reader = new FileReader(); reader.onload = function(e) { PDFJS.workerSrc = '/js/pdf.worker.js'; var outputFormat = (($('input[name="0"]').val() || 'mp3') + '').toLowerCase(); PDFJS.getDocument(new Uint8Array(e.target.result)).then(function(pdf) { var pages = []; var chain = Promise.resolve(); for (let i = 1; i <= pdf.numPages; i++) { chain = chain.then(function(){ return pdf.getPage(i).then(function(page){ return page.getTextContent().then(function(tc){ var txt = (tc.items || []).map(function(it){ return it.str || ''; }).join(' ').replace(/\s+/g,' ').trim(); if (txt) pages.push(txt); }); }); }); } chain.then(function(){ var allText = pages.join('\n\n'); if (!allText) { alert('No readable text found in this PDF.'); return; } Promise.all([ loadScriptPromise('/js/mespeak/mespeak.js'), loadScriptPromise('https://cdn.jsdelivr.net/npm/lamejs@1.2.1/lame.min.js') ]).then(function(){ meSpeak.loadConfig('/js/mespeak/mespeak_config.json'); meSpeak.loadVoice('/js/mespeak/voices/en/en-us.json', function(ok){ if (ok === false) { alert('Could not load audio components in your browser.'); return; } function splitText(text, maxChars) { maxChars = maxChars || 1200; var words = (text || '').split(/\s+/).filter(Boolean); var chunks = []; var cur = ''; for (var i = 0; i < words.length; i++) { var w = words[i]; if ((cur + ' ' + w).trim().length > maxChars) { if (cur) chunks.push(cur.trim()); cur = w; } else { cur += ' ' + w; } } if (cur.trim()) chunks.push(cur.trim()); return chunks; } function audioBufferToWavBlob(audioBuffer) { var numChannels = audioBuffer.numberOfChannels; var sampleRate = audioBuffer.sampleRate; var numSamples = audioBuffer.length; var dataSize = numSamples * numChannels * 2; var buf = new ArrayBuffer(44 + dataSize); var view = new DataView(buf); var ws = function (off, str) { for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i)); }; ws(0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); ws(8, 'WAVE'); ws(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * numChannels * 2, true); view.setUint16(32, numChannels * 2, true); view.setUint16(34, 16, true); ws(36, 'data'); view.setUint32(40, dataSize, true); var off = 44; for (var s = 0; s < numSamples; s++) { for (var ch = 0; ch < numChannels; ch++) { var v = audioBuffer.getChannelData(ch)[s]; v = Math.max(-1, Math.min(1, v)); view.setInt16(off, v < 0 ? v * 32768 : v * 32767, true); off += 2; } } return new Blob([buf], { type: 'audio/wav' }); } var chunks = splitText(allText, 1000); var audioCtx = new (window.AudioContext || window.webkitAudioContext)(); var partBuffers = []; var seq = Promise.resolve(); chunks.forEach(function(part){ seq = seq.then(function(){ var wavData = meSpeak.speak(part, { rawdata: 'array', amplitude: 90, speed: 165, wordgap: 0, variant: 'f3' }); var wavBytes = (wavData instanceof Uint8Array) ? wavData : new Uint8Array(wavData || []); if (!wavBytes || wavBytes.length < 44) throw new Error('bad wav'); return audioCtx.decodeAudioData(wavBytes.buffer.slice(0)).then(function(buf){ partBuffers.push(buf); }); }); }); seq.then(function(){ if (!partBuffers.length) { alert('Could not generate audio from this PDF.'); return; } var numChannels = partBuffers[0].numberOfChannels; var sampleRate = partBuffers[0].sampleRate; var total = 0; partBuffers.forEach(function(b){ total += b.length; }); var out = audioCtx.createBuffer(numChannels, total, sampleRate); for (var ch = 0; ch < numChannels; ch++) { var dest = out.getChannelData(ch); var pos = 0; partBuffers.forEach(function(b){ var src = b.getChannelData(Math.min(ch, b.numberOfChannels - 1)); dest.set(src, pos); pos += src.length; }); } if (outputFormat === 'wav') { var wavBlob = audioBufferToWavBlob(out); add_file_output(URL.createObjectURL(wavBlob), fileName.replace(/\.pdf$/i, '.wav')); return; } var left = out.getChannelData(0); var right = numChannels > 1 ? out.getChannelData(1) : left; var mp3enc = new lamejs.Mp3Encoder(numChannels > 1 ? 2 : 1, sampleRate, 128); var mp3Data = []; var blockSize = 1152; var leftInt = new Int16Array(left.length); var rightInt = numChannels > 1 ? new Int16Array(right.length) : null; for (var i = 0; i < left.length; i++) { leftInt[i] = Math.max(-32768, Math.min(32767, left[i] * 32767)); if (rightInt) rightInt[i] = Math.max(-32768, Math.min(32767, right[i] * 32767)); } for (var i = 0; i < leftInt.length; i += blockSize) { var lChunk = leftInt.subarray(i, i + blockSize); var mp3buf = rightInt ? mp3enc.encodeBuffer(lChunk, rightInt.subarray(i, i + blockSize)) : mp3enc.encodeBuffer(lChunk); if (mp3buf.length > 0) mp3Data.push(mp3buf); } var end = mp3enc.flush(); if (end.length > 0) mp3Data.push(end); var mp3Blob = new Blob(mp3Data, { type: 'audio/mp3' }); add_file_output(URL.createObjectURL(mp3Blob), fileName.replace(/\.pdf$/i, '.mp3')); }).catch(function(){ alert('Could not generate audio from this PDF. Try a smaller PDF or WAV output.'); }); }); }).catch(function(){ alert('Could not load audio components in your browser.'); }); }); }).catch(function(){ alert('Could not read this PDF file.'); }); }; reader.readAsArrayBuffer(blob); } var _loadedScripts = {}; function loadScriptPromise(url) { if (_loadedScripts[url]) return _loadedScripts[url]; _loadedScripts[url] = new Promise(function (resolve, reject) { var s = document.createElement('script'); s.src = url; s.onload = resolve; s.onerror = reject; document.head.appendChild(s); }); return _loadedScripts[url]; } function replaceAll(find, replace, str) { return str.replace(new RegExp(find, 'g'), replace); } function beautify(str) { var result = ''; var length = str.length; var i = 0; var braceCountLeft = 0; var braceCountRight = 0; var withinQuotes = false; while (i < length) { var c = str[i]; if (c == '"' && (i == 0 || c[i - 1] != '\\')) { // non-escaped quotes withinQuotes = !withinQuotes; } if (!withinQuotes && (c == '}' || c == '{' || c == ',')) { console.log('Start####' + result); // look back and remove carriage returns and whitespace that are already there var resultIndex = result.length - 1; while (resultIndex >= 0 && (result[resultIndex] == ' ' || result[resultIndex] == '\r' || result[resultIndex] == '\n' || result[resultIndex] == '\t')) { resultIndex = resultIndex - 1; result = result.substr(0, resultIndex + 1); console.log('char ' + result[resultIndex] + '-----' + result + 'zzz ' + result.length + ' ' + resultIndex); } if (c == '{') { braceCountLeft++; result += c + '\r' + GetTabs(braceCountLeft - braceCountRight); } else if (c == '}') { braceCountRight++; // precede with carriage return result += '\r' + GetTabs(braceCountLeft - braceCountRight) + c; } else if (c == ',') { result += c + '\r' + GetTabs(braceCountLeft - braceCountRight); } var nextChar = ''; // advance through whitespace and remove carriage returns that are already there while (i < length && (str[i + 1] == ' ' || str[i + 1] == '\r' || str[i + 1] == '\n' || str[i + 1] == '\t')) { i++; } } else { result += str[i]; } i++; } return result; } function GetTabs(count) { var result = ''; for (var i = 0; i < count; i++) { result += ' '; } return result; }